Insert sound files into pumilio database

This notebook inserts sound files into the pumilio database, assuming all files are already organized inside the pumilio file structure (.../pumilio/sounds/sounds/'ColID'/'SiteID'/).

Required packages

Variable definitions

sound_directory – directory containing organized sound files to be added to the database
sound_db_filepath – path to output csv file containing sound records ready to import into database


In [8]:
sound_directory = ""

In [ ]:
sound_db_filename = ""

Import packages


In [7]:
import pandas

import re
from datetime import datetime
import os.path
from os import listdir

Functions

generate record of sound for database


In [10]:
def create_db_sound_record(sound, count):
    
    SoundID = count + 100
    SoundStatus = 0
    QualityFlagID = 0
    OtherSoundID = 0
    DerviedSound = ''
    DerviedFromSoundID = ''
    ColID = sound['ColID']
    DirID = sound['ID']
    SoundName = sound['filename'].rstrip('.flac')
    OriginalFilename = sound['filename']
    FileSize = os.path.getsize(os.path.join(sound_directory, ColID, DirID, OriginalFilename))
    MD5_hash = ''
    AudioPreviewFilename = ''
    AudioPreviewFormat = ''
    
    dt = datetime.strptime(SoundName, '%y%m%d-%H%M%S')
    
    Date = dt.strftime('%Y-%m-%d')
    Time = dt.strftime('%H:%M:%S')
    SamplingRate = 48000
    BitRate = 16
    Channels = 2
    Duration = 300
    SoundFormat = 'flac'
    SiteID = int(sound['ID']) + 100
    SensorID = 103
    Notes = ''
    SoundStats = 0
    stamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    
    sound_table = pandas.DataFrame([[
                 SoundID,
                 SoundStatus,
                 QualityFlagID,
                 OtherSoundID,
                 DerviedSound,
                 DerviedFromSoundID,
                 ColID,
                 DirID,
                 SoundName,
                 OriginalFilename,
                 FileSize,
                 MD5_hash,
                 AudioPreviewFilename,
                 AudioPreviewFormat,
                 Date,
                 Time,
                 SamplingRate,
                 BitRate,
                 Channels,
                 Duration,
                 SoundFormat,
                 SiteID,
                 SensorID,
                 Notes,
                 SoundStats,
                 stamp
                 ]], columns=['SoundID', 'SoundStatus', 'QualityFlagID', 'OtherSoundID', 'DerivedSound', 'DerivedFromSoundID', 'ColID', 'DirID', 'SoundName', 'OriginalFilename', 'FileSize', 'MD5_hash', 'AudioPreviewFilename', 'AudioPreviewFormat', 'Date', 'Time', 'SamplingRate', 'BitRate', 'Channels', 'Duration', 'SoundFormat', 'SiteID', 'SensorID', 'Notes', 'SoundStats', 'stamp'])
    #print(sound_table)
    global sounds_table
    sounds_table = sounds_table.append(sound_table, ignore_index=True)

Get all sounds


In [9]:
collections = listdir(sound_directory)
ColIDs = []
IDs = []
filenames = []
for collection in collections:
    m = re.search('\d', collection)
    if m:
        ColID = m.string
        sites = listdir(os.path.join(sound_directory, ColID))
        for site in sites:
            m = re.search('\d', site)
            if m:
                ID = m.string
                sound_files = listdir(os.path.join(sound_directory, ColID, ID))
                for sound_file in sound_files:
                    m = re.search('\d{6}-\d{6}\.flac', sound_file)
                    if m:
                        filename = m.string
                        filenames.append(filename)
                        ColIDs.append(ColID)
                        IDs.append(ID)
sounds = pandas.DataFrame({'filename': filenames,
                           'ColID': ColIDs,
                           'ID': IDs})
sounds = sounds.sort_values(by='filename')

Create and export table (csv) of sound records


In [11]:
sounds_table = pandas.DataFrame()
count = 1
for index, sound in sounds.iterrows():
    create_db_sound_record(sound, count)
    count = count + 1
sounds_table.to_csv(sound_db_filepath)